#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# @Time     :2021/9/14 下午10:47
# @Author   :khan_long
# @Email    :longkehan15@qq.com
# @File     :check.py

import jsonlines


def load_text(_path):
    ans = []
    for obj in jsonlines.open(_path):
        ans.append(obj['text'])
    return ans


train = load_text('./train.jsonl')
test = load_text('./test.jsonl')
dev = load_text('./dev.jsonl')

sec = load_text('./scaffolds/sections-scaffold-train.jsonl')
wor = load_text('./scaffolds/cite-worthiness-scaffold-train.jsonl')


def count_percentage(l1, l2):
    count = 0
    for one in l1:
        if one in l2:
            count += 1
    return count


print(f'there are {len(sec)} ccs in section task, among which {count_percentage(sec, train + test + dev)} are in '
      f'citation_intent task')

print(f'there are {len(wor)} ccs in worthiness task, among which {count_percentage(wor, train + test + dev)} are in '
      f'citation_intent task')
